/////////////////////////////////////////////////////////////////////////////
// Copyright (c) 2009-2014 Alan Wright. All rights reserved.
// Distributable under the terms of either the Apache License (Version 2.0)
// or the GNU Lesser General Public License.
/////////////////////////////////////////////////////////////////////////////

#include "TestInc.h"
#include "BaseTokenStreamFixture.h"
#include "GreekAnalyzer.h"

using namespace Lucene;

typedef BaseTokenStreamFixture GreekAnalyzerTest;

TEST_F(GreekAnalyzerTest, testAnalyzer1) {
    AnalyzerPtr a = newLucene<GreekAnalyzer>(LuceneVersion::LUCENE_CURRENT);

    const uint8_t input[] = {
        0xce, 0x9c, 0xce, 0xaf, 0xce, 0xb1, 0x20, 0xce, 0xb5, 0xce, 0xbe, 0xce, 0xb1, 0xce, 0xb9, 0xcf,
        0x81, 0xce, 0xb5, 0xcf, 0x84, 0xce, 0xb9, 0xce, 0xba, 0xce, 0xac, 0x20, 0xce, 0xba, 0xce, 0xb1,
        0xce, 0xbb, 0xce, 0xae, 0x20, 0xce, 0xba, 0xce, 0xb1, 0xce, 0xb9, 0x20, 0xcf, 0x80, 0xce, 0xbb,
        0xce, 0xbf, 0xcf, 0x8d, 0xcf, 0x83, 0xce, 0xb9, 0xce, 0xb1, 0x20, 0xcf, 0x83, 0xce, 0xb5, 0xce,
        0xb9, 0xcf, 0x81, 0xce, 0xac, 0x20, 0xcf, 0x87, 0xce, 0xb1, 0xcf, 0x81, 0xce, 0xb1, 0xce, 0xba,
        0xcf, 0x84, 0xce, 0xae, 0xcf, 0x81, 0xcf, 0x89, 0xce, 0xbd, 0x20, 0xcf, 0x84, 0xce, 0xb7, 0xcf,
        0x82, 0x20, 0xce, 0x95, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xb7, 0xce, 0xbd, 0xce, 0xb9, 0xce, 0xba,
        0xce, 0xae, 0xcf, 0x82, 0x20, 0xce, 0xb3, 0xce, 0xbb, 0xcf, 0x8e, 0xcf, 0x83, 0xcf, 0x83, 0xce,
        0xb1, 0xcf, 0x82
    };

    const uint8_t token1[] = {0xce, 0xbc, 0xce, 0xb9, 0xce, 0xb1};
    const uint8_t token2[] = {0xce, 0xb5, 0xce, 0xbe, 0xce, 0xb1, 0xce, 0xb9, 0xcf, 0x81, 0xce, 0xb5,
                              0xcf, 0x84, 0xce, 0xb9, 0xce, 0xba, 0xce, 0xb1
                             };
    const uint8_t token3[] = {0xce, 0xba, 0xce, 0xb1, 0xce, 0xbb, 0xce, 0xb7};
    const uint8_t token4[] = {0xcf, 0x80, 0xce, 0xbb, 0xce, 0xbf, 0xcf, 0x85, 0xcf, 0x83, 0xce, 0xb9, 0xce, 0xb1};
    const uint8_t token5[] = {0xcf, 0x83, 0xce, 0xb5, 0xce, 0xb9, 0xcf, 0x81, 0xce, 0xb1};
    const uint8_t token6[] = {0xcf, 0x87, 0xce, 0xb1, 0xcf, 0x81, 0xce, 0xb1, 0xce, 0xba, 0xcf, 0x84,
                              0xce, 0xb7, 0xcf, 0x81, 0xcf, 0x89, 0xce, 0xbd
                             };
    const uint8_t token7[] = {0xce, 0xb5, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xb7, 0xce, 0xbd, 0xce, 0xb9, 0xce, 0xba,
                              0xce, 0xb7, 0xcf, 0x83
                             };
    const uint8_t token8[] = {0xce, 0xb3, 0xce, 0xbb, 0xcf, 0x89, 0xcf, 0x83, 0xcf, 0x83, 0xce, 0xb1, 0xcf, 0x83};

    // Verify the correct analysis of capitals and small accented letters
    checkAnalyzesTo(a, UTF8_TO_STRING(input), newCollection<String>(
                        UTF8_TO_STRING(token1),
                        UTF8_TO_STRING(token2),
                        UTF8_TO_STRING(token3),
                        UTF8_TO_STRING(token4),
                        UTF8_TO_STRING(token5),
                        UTF8_TO_STRING(token6),
                        UTF8_TO_STRING(token7),
                        UTF8_TO_STRING(token8)
                    ));
}

TEST_F(GreekAnalyzerTest, testAnalyzer2) {
    AnalyzerPtr a = newLucene<GreekAnalyzer>(LuceneVersion::LUCENE_CURRENT);

    const uint8_t input[] = {
        0xce, 0xa0, 0xcf, 0x81, 0xce, 0xbf, 0xcf, 0x8a, 0xcf, 0x8c, 0xce, 0xbd, 0xcf, 0x84, 0xce, 0xb1,
        0x20, 0x28, 0xce, 0xba, 0xce, 0xb1, 0xce, 0xb9, 0x29, 0x20, 0x20, 0x20, 0x20, 0x20, 0x5b, 0xcf,
        0x80, 0xce, 0xbf, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xb1, 0xcf, 0x80, 0xce, 0xbb, 0xce, 0xad, 0xcf,
        0x82, 0x5d, 0x09, 0x2d, 0x09, 0xce, 0x91, 0xce, 0x9d, 0xce, 0x91, 0xce, 0x93, 0xce, 0x9a, 0xce,
        0x95, 0xce, 0xa3
    };

    const uint8_t token1[] = {0xcf, 0x80, 0xcf, 0x81, 0xce, 0xbf, 0xce, 0xb9, 0xce, 0xbf, 0xce, 0xbd,
                              0xcf, 0x84, 0xce, 0xb1
                             };
    const uint8_t token2[] = {0xcf, 0x80, 0xce, 0xbf, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xb1, 0xcf, 0x80,
                              0xce, 0xbb, 0xce, 0xb5, 0xcf, 0x83
                             };
    const uint8_t token3[] = {0xce, 0xb1, 0xce, 0xbd, 0xce, 0xb1, 0xce, 0xb3, 0xce, 0xba, 0xce, 0xb5,
                              0xcf, 0x83
                             };

    // Verify the correct analysis of small letters with diaeresis and the elimination of punctuation marks
    checkAnalyzesTo(a, UTF8_TO_STRING(input), newCollection<String>(
                        UTF8_TO_STRING(token1),
                        UTF8_TO_STRING(token2),
                        UTF8_TO_STRING(token3)
                    ));
}

TEST_F(GreekAnalyzerTest, testAnalyzer3) {
    AnalyzerPtr a = newLucene<GreekAnalyzer>(LuceneVersion::LUCENE_CURRENT);

    const uint8_t input[] = {
        0xce, 0xa0, 0xce, 0xa1, 0xce, 0x9f, 0xce, 0xab, 0xce, 0xa0, 0xce, 0x9f, 0xce, 0x98, 0xce, 0x95,
        0xce, 0xa3, 0xce, 0x95, 0xce, 0x99, 0xce, 0xa3, 0x20, 0x20, 0xce, 0x86, 0xcf, 0x88, 0xce, 0xbf,
        0xce, 0xb3, 0xce, 0xbf, 0xcf, 0x82, 0x2c, 0x20, 0xce, 0xbf, 0x20, 0xce, 0xbc, 0xce, 0xb5, 0xcf,
        0x83, 0xcf, 0x84, 0xcf, 0x8c, 0xcf, 0x82, 0x20, 0xce, 0xba, 0xce, 0xb1, 0xce, 0xb9, 0x20, 0xce,
        0xbf, 0xce, 0xb9, 0x20, 0xce, 0xac, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xbf, 0xce, 0xb9
    };

    const uint8_t token1[] = {0xcf, 0x80, 0xcf, 0x81, 0xce, 0xbf, 0xcf, 0x85, 0xcf, 0x80, 0xce, 0xbf,
                              0xce, 0xb8, 0xce, 0xb5, 0xcf, 0x83, 0xce, 0xb5, 0xce, 0xb9, 0xcf, 0x83
                             };
    const uint8_t token2[] = {0xce, 0xb1, 0xcf, 0x88, 0xce, 0xbf, 0xce, 0xb3, 0xce, 0xbf, 0xcf, 0x83};
    const uint8_t token3[] = {0xce, 0xbc, 0xce, 0xb5, 0xcf, 0x83, 0xcf, 0x84, 0xce, 0xbf, 0xcf, 0x83};
    const uint8_t token4[] = {0xce, 0xb1, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xbf, 0xce, 0xb9};

    // Verify the correct analysis of capital accented letters and capital letters with diaeresis,
    // as well as the elimination of stop words
    checkAnalyzesTo(a, UTF8_TO_STRING(input), newCollection<String>(
                        UTF8_TO_STRING(token1),
                        UTF8_TO_STRING(token2),
                        UTF8_TO_STRING(token3),
                        UTF8_TO_STRING(token4)
                    ));
}

TEST_F(GreekAnalyzerTest, testReusableTokenStream1) {
    AnalyzerPtr a = newLucene<GreekAnalyzer>(LuceneVersion::LUCENE_CURRENT);

    const uint8_t input[] = {
        0xce, 0x9c, 0xce, 0xaf, 0xce, 0xb1, 0x20, 0xce, 0xb5, 0xce, 0xbe, 0xce, 0xb1, 0xce, 0xb9, 0xcf,
        0x81, 0xce, 0xb5, 0xcf, 0x84, 0xce, 0xb9, 0xce, 0xba, 0xce, 0xac, 0x20, 0xce, 0xba, 0xce, 0xb1,
        0xce, 0xbb, 0xce, 0xae, 0x20, 0xce, 0xba, 0xce, 0xb1, 0xce, 0xb9, 0x20, 0xcf, 0x80, 0xce, 0xbb,
        0xce, 0xbf, 0xcf, 0x8d, 0xcf, 0x83, 0xce, 0xb9, 0xce, 0xb1, 0x20, 0xcf, 0x83, 0xce, 0xb5, 0xce,
        0xb9, 0xcf, 0x81, 0xce, 0xac, 0x20, 0xcf, 0x87, 0xce, 0xb1, 0xcf, 0x81, 0xce, 0xb1, 0xce, 0xba,
        0xcf, 0x84, 0xce, 0xae, 0xcf, 0x81, 0xcf, 0x89, 0xce, 0xbd, 0x20, 0xcf, 0x84, 0xce, 0xb7, 0xcf,
        0x82, 0x20, 0xce, 0x95, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xb7, 0xce, 0xbd, 0xce, 0xb9, 0xce, 0xba,
        0xce, 0xae, 0xcf, 0x82, 0x20, 0xce, 0xb3, 0xce, 0xbb, 0xcf, 0x8e, 0xcf, 0x83, 0xcf, 0x83, 0xce,
        0xb1, 0xcf, 0x82
    };

    const uint8_t token1[] = {0xce, 0xbc, 0xce, 0xb9, 0xce, 0xb1};
    const uint8_t token2[] = {0xce, 0xb5, 0xce, 0xbe, 0xce, 0xb1, 0xce, 0xb9, 0xcf, 0x81, 0xce, 0xb5,
                              0xcf, 0x84, 0xce, 0xb9, 0xce, 0xba, 0xce, 0xb1
                             };
    const uint8_t token3[] = {0xce, 0xba, 0xce, 0xb1, 0xce, 0xbb, 0xce, 0xb7};
    const uint8_t token4[] = {0xcf, 0x80, 0xce, 0xbb, 0xce, 0xbf, 0xcf, 0x85, 0xcf, 0x83, 0xce, 0xb9, 0xce, 0xb1};
    const uint8_t token5[] = {0xcf, 0x83, 0xce, 0xb5, 0xce, 0xb9, 0xcf, 0x81, 0xce, 0xb1};
    const uint8_t token6[] = {0xcf, 0x87, 0xce, 0xb1, 0xcf, 0x81, 0xce, 0xb1, 0xce, 0xba, 0xcf, 0x84,
                              0xce, 0xb7, 0xcf, 0x81, 0xcf, 0x89, 0xce, 0xbd
                             };
    const uint8_t token7[] = {0xce, 0xb5, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xb7, 0xce, 0xbd, 0xce, 0xb9, 0xce, 0xba,
                              0xce, 0xb7, 0xcf, 0x83
                             };
    const uint8_t token8[] = {0xce, 0xb3, 0xce, 0xbb, 0xcf, 0x89, 0xcf, 0x83, 0xcf, 0x83, 0xce, 0xb1, 0xcf, 0x83};

    // Verify the correct analysis of capitals and small accented letters
    checkAnalyzesToReuse(a, UTF8_TO_STRING(input), newCollection<String>(
                             UTF8_TO_STRING(token1),
                             UTF8_TO_STRING(token2),
                             UTF8_TO_STRING(token3),
                             UTF8_TO_STRING(token4),
                             UTF8_TO_STRING(token5),
                             UTF8_TO_STRING(token6),
                             UTF8_TO_STRING(token7),
                             UTF8_TO_STRING(token8)
                         ));
}

TEST_F(GreekAnalyzerTest, testReusableTokenStream2) {
    AnalyzerPtr a = newLucene<GreekAnalyzer>(LuceneVersion::LUCENE_CURRENT);

    const uint8_t input[] = {
        0xce, 0xa0, 0xcf, 0x81, 0xce, 0xbf, 0xcf, 0x8a, 0xcf, 0x8c, 0xce, 0xbd, 0xcf, 0x84, 0xce, 0xb1,
        0x20, 0x28, 0xce, 0xba, 0xce, 0xb1, 0xce, 0xb9, 0x29, 0x20, 0x20, 0x20, 0x20, 0x20, 0x5b, 0xcf,
        0x80, 0xce, 0xbf, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xb1, 0xcf, 0x80, 0xce, 0xbb, 0xce, 0xad, 0xcf,
        0x82, 0x5d, 0x09, 0x2d, 0x09, 0xce, 0x91, 0xce, 0x9d, 0xce, 0x91, 0xce, 0x93, 0xce, 0x9a, 0xce,
        0x95, 0xce, 0xa3
    };

    const uint8_t token1[] = {0xcf, 0x80, 0xcf, 0x81, 0xce, 0xbf, 0xce, 0xb9, 0xce, 0xbf, 0xce, 0xbd,
                              0xcf, 0x84, 0xce, 0xb1
                             };
    const uint8_t token2[] = {0xcf, 0x80, 0xce, 0xbf, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xb1, 0xcf, 0x80,
                              0xce, 0xbb, 0xce, 0xb5, 0xcf, 0x83
                             };
    const uint8_t token3[] = {0xce, 0xb1, 0xce, 0xbd, 0xce, 0xb1, 0xce, 0xb3, 0xce, 0xba, 0xce, 0xb5,
                              0xcf, 0x83
                             };

    // Verify the correct analysis of small letters with diaeresis and the elimination of punctuation marks
    checkAnalyzesToReuse(a, UTF8_TO_STRING(input), newCollection<String>(
                             UTF8_TO_STRING(token1),
                             UTF8_TO_STRING(token2),
                             UTF8_TO_STRING(token3)
                         ));
}

TEST_F(GreekAnalyzerTest, testReusableTokenStream3) {
    AnalyzerPtr a = newLucene<GreekAnalyzer>(LuceneVersion::LUCENE_CURRENT);

    const uint8_t input[] = {
        0xce, 0xa0, 0xce, 0xa1, 0xce, 0x9f, 0xce, 0xab, 0xce, 0xa0, 0xce, 0x9f, 0xce, 0x98, 0xce, 0x95,
        0xce, 0xa3, 0xce, 0x95, 0xce, 0x99, 0xce, 0xa3, 0x20, 0x20, 0xce, 0x86, 0xcf, 0x88, 0xce, 0xbf,
        0xce, 0xb3, 0xce, 0xbf, 0xcf, 0x82, 0x2c, 0x20, 0xce, 0xbf, 0x20, 0xce, 0xbc, 0xce, 0xb5, 0xcf,
        0x83, 0xcf, 0x84, 0xcf, 0x8c, 0xcf, 0x82, 0x20, 0xce, 0xba, 0xce, 0xb1, 0xce, 0xb9, 0x20, 0xce,
        0xbf, 0xce, 0xb9, 0x20, 0xce, 0xac, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xbf, 0xce, 0xb9
    };

    const uint8_t token1[] = {0xcf, 0x80, 0xcf, 0x81, 0xce, 0xbf, 0xcf, 0x85, 0xcf, 0x80, 0xce, 0xbf,
                              0xce, 0xb8, 0xce, 0xb5, 0xcf, 0x83, 0xce, 0xb5, 0xce, 0xb9, 0xcf, 0x83
                             };
    const uint8_t token2[] = {0xce, 0xb1, 0xcf, 0x88, 0xce, 0xbf, 0xce, 0xb3, 0xce, 0xbf, 0xcf, 0x83};
    const uint8_t token3[] = {0xce, 0xbc, 0xce, 0xb5, 0xcf, 0x83, 0xcf, 0x84, 0xce, 0xbf, 0xcf, 0x83};
    const uint8_t token4[] = {0xce, 0xb1, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xbf, 0xce, 0xb9};

    // Verify the correct analysis of capital accented letters and capital letters with diaeresis,
    // as well as the elimination of stop words
    checkAnalyzesToReuse(a, UTF8_TO_STRING(input), newCollection<String>(
                             UTF8_TO_STRING(token1),
                             UTF8_TO_STRING(token2),
                             UTF8_TO_STRING(token3),
                             UTF8_TO_STRING(token4)
                         ));
}
